*!! BLIND
*!! SL quant analysis
*!! Written by Brian Fitzpatrick (bfitzpatrick@gibsonconsult.com)

eststo clear
preamble, f(BLIND\2023-2024\summer_learning) log(04_quant_analysis) 

global regs 0
global resid 0
global margins 1

global graph svg //png pdf

if `"`c(username)'"' == `"BrianFitzpatrick"' global sp "C:\Users\BrianFitzpatrick\OneDrive - GIBSON\Documents\BLIND Schools\FOCUS\Summer Learning\Year 3 (23-24)\da\quant_analysis"
else ERROR - ENTER SP DIRECTORY 

use `"${final}/all_admin_data"' , clear

fre race gt

*final changes
sum ms_score*
gen mean_ms_score10=mean_ms_score/10
sum mean_ms_score10
la var mean_ms_score10 "Mean Milestone Score (x10)"
recode race (8=1 "White") (3=2 "Black (ref. White)") (4=3 "Hispanic") (2=4 "Asian") (else=5 "Other"), gen(race5) 
encode sl_school_name, gen(sl_school_id)
la var sex "Female Students (ref. Male Students)"
*la def disadv 0 "Eco. Adv." 1 "Eco. Dis.", replace
la def gt 0 "Not Enrolled in Gifted/Talented" 1 "Enrolled in Gifted/Talented"
la val gt gt
*la def i504 0 "No 504", modify

encode sl_school_name, gen(sl_school)
encode sy_school_name, gen(sy_school)

fre zone
recode zone (1=.) (3/5 = 1 ) (6/9 = 2 ) , gen(half)
replace half=3 if zone==2
la def h 1 "Zones 1-3" 2 "Zones 4-7" 3 "Virtual"
la val half h

local l
forval i=1/7 {
	local l `l' `=`i'+2' `"`i'"'
}
la def zone2 2 "V" `l', replace 
drop if zone==1
fre sl_school
gen vir_sl=sl_school==16 if !mi(sl_school)
la var vir_sl "Virtual SL"

fre vir_sl
tab vir_sl vir_enroll
tab vir_sl low_att, m
tab vir_enroll low_att, m
tab disadv low_att, row
bys disadv: sum dayswithatleastoneperiodab if vir_enroll==0
**********************************Descriptives**********************************
**********************************Descriptives**********************************
**********************************Descriptives**********************************
global iv disadv sped gt i504 i.race5 sex lep i.grade //cumulativegpa mean_ms_score10
global iv2 disadv sped gt i504 race5 sex lep
global dv recommended attended low_att
global tests ms_quart_ela ms_quart_math zone gpa_cat
sum recommended attended low_att $iv2

tab grade attended
*drop schools with SL send rates of 0
bys sy_school: egen att_rate=mean(attended)
fre sy_school if att_rate==0
tab sy_school zone if att_rate==0
drop if att_rate==0
drop att_rate
*create sample variable
qui reg recommended attended $iv2 sy_school zone
gen sample=e(sample)
fre sample
tab sample attended
fre sy_school if sample==0
*!BF Note 10/19/2023: so, this is a few schools that merged to no students and a couple hundred students with no SY school
keep if sample==1
drop sample
*students with complete data
distinct studentpersonkey
*+ with milestone scores
distinct studentpersonkey if school_level==1 & !mi(mean_ms_score)
distinct studentpersonkey if school_level==2 & !mi(mean_ms_score)
distinct studentpersonkey if school_level==3 & !mi(cumulativegpa)
fre recommended attended low_att
tab recommended attended, col
foreach v in $dv {
	dis `"`v'"'
	tab school_level `v' , row
}

tab vir_enroll low_att, row

fre grade if !mi(cumulativegpa)
replace cumulativegpa=. if grade<9
replace gpa_cat=. if grade<9
fre grade if !mi(ms_quart_ela)
fre grade if !mi(ms_quart_math)
foreach v of varlist $tests {
	dis `"`v'"'
	fre  grade if !mi(`v')
}

table race5 , stat(mean mean_ms_score cumulativegpa) nformat(%5.0f)

tab ms_quart_ela low_att , row
tab gpa_cat recommended , row
tab gpa_cat low_att , row
tab vir_enroll low_att, row
fre vir_enroll

fre low_att
tab school_level low_att, row

table ms_quart_ela , stat(fvpercent recommended)
table  ms_quart_math, stat(fvpercent recommended) //both round to 100 in the lowest quartile!

********************************End Descriptives********************************
********************************End Descriptives********************************
********************************End Descriptives********************************

*regression N-sizes 
foreach v of varlist $iv2 $tests {
	dis `"V=`v'"'
	sum $dv if !mi(`v')
}

***********************************Regression***********************************
***********************************Regression***********************************
***********************************Regression***********************************
gen has_ms=!mi(mean_ms_score)
tab grade has_ms

if $regs == 1 {
	collect clear
	collect create reg_models

	foreach v in $dv {
		*for regression models
		collect _r_b _r_se e(N), name(reg_models) tag(model[(`v')]) : eststo e: logit `v' $iv , or vce(cl school_id) //one regression per dep. var.
		collect stars _r_p 0.001 "***" 0.01 "**" 0.05 "*", attach(_r_b)
		foreach ind in $iv2 {
			est restore e
			*one margins command, and one dataset, for each ind. var.
			levelsof `ind'
			margins, at(`ind'=(`r(levels)')) post saving("${results}/impact_estimates/`v'_`ind'_margins.dta", replace)
		}

		*for test score descriptives
		foreach ind of varlist $tests {
			eststo: logit `v' i.`ind' , or vce(cl school_id)
			levelsof `ind'
			margins, at(`ind'=(`r(levels)')) post saving("${results}/impact_estimates/`v'_`ind'_margins.dta", replace)
		}
		
	}
	**finalize table
	// REMOVE THE VERTICAL LINE
	collect layout (colname#result result[N]) (model), name(reg_models)
	collect style cell border_block, border(right, pattern(nil))
	collect style row stack, spacer delimiter(" x ")
	// FORMAT THE NUMBERS
	collect style cell, nformat(%5.2f)
	// PUT PARENTHESES AROUND THE STANDARD ERRORS
	collect style cell result[_r_se], sformat("(%s)")
	collect style cell result[N], nformat(%6.0fc)
	collect style header result, level(hide)	
	collect style column, extraspace(1)	
	collect style header result[N], level(label)		
	collect label levels result N "Observations", modify				
	collect style showbase off
	collect style putdocx, layout(autofitcontents) title("Table X: Models Predicting SL Invite & Attendance") note(Note. Each cell contains a coefficient, with standard errors parentheses. * p<0.05, ** p<0.01, and *** p<0.001.) note(Source. Gibson's analysis of BLIND extant student testing data.) 
	collect preview

	**Begin export
	putdocx clear
	putdocx begin
	putdocx collect, tablename(reg_models)
	putdocx table reg_models(1,.), font(Arial, 9, black)	
	putdocx table reg_models(2,.), shading("${giblue}") font(Arial, 9, white) bold
	putdocx save "${output}/reg_tables", replace
	copy "${output}/reg_tables.docx" "${sp}/reg_tables.docx", replace
}
*********************************End Regression*********************************
*********************************End Regression*********************************
*********************************End Regression*********************************

*******************************Residual Analysis********************************
*******************************Residual Analysis********************************
*******************************Residual Analysis********************************
*!BF Note 7/30/2025: This analysis has school names attached, so I cannot include the results in public files
if $resid == 1 {
	
	*for SL residual analysis
	gen resid_attend=.
	levelsof school_level 
	foreach l in `r(levels)' {
		if `l'==3 local score cumulativegpa
		else local score mean_ms_score
		logit attended $iv `score' if school_level==`l'
		predict temp if school_level==`l'
		replace resid_attend=temp if school_level==`l'
		drop temp
	}
		
	*predict residuals
	reg resid_attend i.sy_school
	fre attended if e(sample)==1
	margins, over(sy_school) post saving("${results}/impact_estimates/resid_attend.dta", replace)
	
	preserve 
	use "${results}/impact_estimates/resid_attend.dta", clear
	keep _by1 _margin _ci_lb _ci_ub
	rename _by1 sy_school
	rename _margin pred_attend_rate
	sort pred_attend_rate
	list sy_school pred_attend_rate
	tempfile tf 
	save `tf', replace
	restore 
	
	preserve
	gen school_size=1
	gen att_num=attended
	gen rec_num=recommended
	collapse (mean) attended disadv recommended vir_sl (sum) school_size att_num rec_num, by(sy_school half zone school_level)
	
	drop if sy_school==.
	list sy_school if school_size==1 //odd that this happened twice
	drop if school_size==1
	rename attended attend_rate
	rename recommended rec_rate
	merge 1:1 sy_school using `tf'
	keep if _merge==3
	fre sy_school if attend_rate==0
	rename disadv disadv_rate
	corr *rate
	
	la var pred_attend_rate "Predicted Attendance Rate"
	la var disadv_rate "Percent of Students Disadvantaged"
	la var rec_rate "Percent of Students Recmd. for SL"
	
	
	gen resid=attend_rate-pred_attend_rate
	sum resid pred_attend_rate attend_rate
	*!BF Note 10/18/2023: I looked into it, and this stems from issues with my means not adjusting for school size - so I readjust here
	levelsof school_level
	foreach l in `r(levels)' {
		sum resid if school_level==`l'
		replace pred_attend_rate=pred_attend_rate+`r(mean)' if school_level==`l'
	}
	sum resid pred_attend_rate attend_rate

	gen diff=attend_rate-pred_attend_rate
	foreach v in attend_rate pred_attend_rate disadv_rate rec_rate vir_sl {
		replace `v'=`v'*100
		gen r`v'=round(`v', 1)
	}
	gen rdiff=rattend_rate-rpred_attend_rate
	
	bys school_level (resid): gen exceptional=_n>_N-5
	tab school_level exceptional 
	decode sy_school, gen(s2)
	replace s2="" if exceptional==0
	encode s2, gen(s3)
	
	list sy_school rrec_rate rattend_rate rpred_attend_rate rdisadv_rate school_size

	local c1 $giblue
	local c2 $teal
	foreach x in rec_rate pred_attend_rate disadv_rate {
		levelsof school_level
		foreach l in `r(levels)' {
			local opts2 msymbol(o) mlab(s2) mlabsize(2) mlabpos(9) //mlabangle(345) msize(.5)
			local weight "" //[fweight=att_num]
			local gap 4
			
			sum `x' if school_level==`l'
			local max=floor(`=(`r(max)'+`gap')/10')*10
			local min=ceil(`=(`r(min)'-`gap')/10')*10
			local xl
			dis `"`min'(10)`max'"'
			forval j=`min'(10)`max' {
				local xl `xl' `j' "`j'%"
			}
			
			sum attend_rate if school_level==`l'
			local max=floor(`=(`r(max)'+`gap')/10')*10
			local min=ceil(`=(`r(min)'-`gap')/10')*10
			local yl
			dis `"`min'(10)`max'"'
			forval j=`min'(10)`max' {
				local yl `yl' `j' "`j'%"
			}
			
			dis `"`xl'"'
			dis `"`yl'"'
			twoway ///
			(scatter attend_rate `x' `weight' if half==1 & school_level==`l' , `opts2' mcolor(`"`c1'"') mlabcolor(`"`c1'"'))  ///
			(scatter attend_rate `x' `weight' if half==2 & school_level==`l' , `opts2' mcolor(`"`c2'"') mlabcolor(`"`c2'"')) ///
			(scatter attend_rate `x' `weight' if half==3 & school_level==`l' , `opts2' mcolor(black) mlabcolor(black)) ///
			(lfit attend_rate `x' if school_level==`l' , lcolor(`"${giblue}"') lw(thin)) ///
			, ylab(`yl') xlab(`xl') title(`""') xtitle("`:var lab `x''") ytitle("Summer Learning Attendance Rate") legend(row(1) lab(1 "Zones 1-3") lab(2 "Zones 4-7") lab(3 "Virtual") lab(4 "Line of Best Fit") pos(6)) scheme(cleanplots)
			
			graphsout "${output}/figures/scatter_`x'_sl`l'", type(${graph}) replace 
			copy "${output}/figures/scatter_`x'_sl`l'.${graph}" "${sp}/scatter_`x'_sl`l'.${graph}", replace
			*if `l'==3 stop
		}
	}
	corr vir_sl attend_rate	
	keep if exceptional==1
	keep school_level sy_school zone rdisadv_rate rrec_rate rpred_attend_rate rattend_rate rdiff rvir_sl 
	order school_level sy_school zone rdisadv_rate rrec_rate rpred_attend_rate rattend_rate rdiff rvir_sl  
	foreach v of varlist r* {
		replace `v'=`v'/100
	} 
	gsort school_level -rdiff
	export excel using "${sp}/top_sending_schools.xlsx", firstrow(var) replace
	restore
}
*****************************End Residual Analysis******************************
*****************************End Residual Analysis******************************
*****************************End Residual Analysis******************************

*********************************Margins Plots**********************************
*********************************Margins Plots**********************************
*********************************Margins Plots**********************************
if $margins == 1 {
	local sup1 Recommended for SL
	local sup2 Participated in SL
	local sup3 ≥20% Absence Rate
	local sups 3
	
	local at 1
	foreach ind of varlist $iv2 $tests {
		local i 1
		foreach v in $dv { //combine those margins for all the ind. vars. of each regression
			if `i'==1 {
				use "${results}/impact_estimates/`v'_`ind'_margins.dta", clear 
				gen dv="`v'"
				gen n=_n+`i'-1
			}
			else {
				qui append using "${results}/impact_estimates/`v'_`ind'_margins.dta"
				replace dv="`v'" if mi(dv)
				replace n=_n+`i'-1 if mi(n)
			}
			local ++i
		}
		if strpos(`"`ind'"' , "ms_quart")==0 & strpos(`"`ind'"' , "zone")==0 & strpos(`"`ind'"' , "gpa")==0 local kv _at`at'
		else local kv _at1 //for test score regressions
		if `"`ind'"'==`"race5"' {
			decode `kv', gen(`kv'2)
			drop `kv'
			replace `kv'2=substr(`kv'2, 1, 1)
			encode `kv'2, gen(`kv')
		}
		*!BF Note 11/10/2023: this I have never seen before! The margins command couldn't converge
		if strpos(`"`ind'"' , "ms_quart")==1 {
			replace _margin=1 if _margin==.
		}  
		qui sum `kv'
		local span=`r(max)'-`r(min)'
		*the above only works if the categories have increments of one
		gen perc=_margin*100
		gen id=_n
		local xm 
		levelsof id
		foreach l in `r(levels)' {
			local xm `xm' `=n[`l']' "`: label (`kv') `=`kv'[`l']''"
		}
		qui sum perc
		local max=ceil(`=`r(max)'/10')*10
		local yl
		forval j=0(10)`max' {
			local yl `yl' `j' "`j'%"
		}
		local xl 
		forval j=1/`sups' {
			local xl `xl' `=((`j'-1)*(`span'+2)+1)+(`span'/2)' `"`sup`j''"'
		}
		gen str perc2=string(round(perc,1))
		replace perc2=perc2+"%"
		
		dis `"xmlab = `xm'"'
		dis `"ylab = `yl'"'
		dis `"xlab = `xl'"'
		*!BF Note 8/10/2023: yay for algebra! 

		global lab_all barw(.95) lcolor(none) mlabsize(3) mlabcolor(black) mlabpos(12) mlabf(%3.0f) mlab(perc2)
		*two colors 
		capture distinct _at
		if `r(ndistinct)'==2 {
			twoway (bar perc n if _at`at'==0, fcolor(`"${giblue}"') ${lab_all}) (bar perc n if _at`at'==1, lcolor(none) fcolor(`"${teal}"') ${lab_all})  , scheme(cleanplots) ylab(`yl') xlab(`xl', labsize(3.5)) xtitle("") ytitle("Percent of Students") legend(label(1 `"`:label (_at`at') 0'"') label(2 `"`:label (_at`at') 1'"') pos(6) rows(1)) //xmlab(`xm' , labsize(3))  
		}
		*one color
		else {
			twoway bar perc n , scheme(cleanplots) fcolor(`"${giblue}"') ylab(`yl') xlab(`xl', labgap(4) labsize(3.5)) xtitle("") ytitle("Percent of Students") xmlab(`xm' , labsize(3)) ${lab_all}
		}
		graphsout "${output}/figures/att_`ind'", type(${graph}) replace
		copy "${output}/figures/att_`ind'.${graph}" "${sp}/att_`ind'.${graph}", replace
		*if `"`ind'"'==`"disadv"' STOP
		local ++at
	}

}
*******************************End Margins Plots********************************
*******************************End Margins Plots********************************
*******************************End Margins Plots********************************



log close